In [1]:
%matplotlib
In [2]:
import sys
import pandas as pa
import numpy as np
from PrimeText import PrimeText
import matplotlib.pyplot as plt
In [3]:
pt = PrimeText()
ytData = pa.read_csv("utubeES.csv",encoding ='ISO-8859-1')
comments = ytData['comment']
In [9]:
pt.cleanData(comments)
pt.assembleDictionary()
pt.indexDictionary()
pt.indexComments()
In [12]:
keyText = []
keyCount = []
for key, value in pt.indexedDictionary.items():
c = pt.countInRecords([key])
keyText.append(key)
keyCount.append(c)
s1 = pa.Series(keyCount,index=keyText)
sortedS1 = s1.sort_values(ascending= False)[:50]
sortedS1.plot.bar()
Out[12]:
In [6]:
df = pa.DataFrame(index=sortedS1.index, columns=sortedS1.index)
df = df.fillna(0)
In [7]:
names = sortedS1.index
colsdone = 0
for col in names:
colsdone += 1
sys.stdout.write("\rCols done : %i" % colsdone)
for row in names:
df[col][row] = pt.countInRecords([col,row])
sys.stdout.write("\n")
sys.stdout.flush()
In [11]:
imgplot = plt.imshow(df,interpolation="nearest")
plt.xticks( range(len(names)), names, rotation=90 )
plt.yticks( range(len(names)), names, rotation=0 )
plt.colorbar()
plt.show()
In [ ]:
In [ ]: